import seaborn
from collections import Counter
%run tfa_phase1.py
dfX, model_base = create_TrendDF(2009, 2012)
dfX.df.head()
len(dfX.df)
def make_year(df, year):
mask = df.year == year
return df.loc[mask]
def make_quarter(df, year, quarter):
year_df = make_year(df, year)
mask = year_df.quarter == quarter
return year_df.loc[mask]
df2009 = TrendDF(make_year(dfX.df, 2009))
df2010 = TrendDF(make_year(dfX.df, 2010))
df2011 = TrendDF(make_year(dfX.df, 2011))
df2012 = TrendDF(make_year(dfX.df, 2012))
df2012.df.head()
len(df2009.df), len(df2010.df), len(df2011.df), len(df2012.df)
model_base.most_similar('dress')
model_base.most_similar('cotton')
model_base.most_similar('zara')
dfX.find_all_bigrams_above_threshold(500)
#find all garment phrases mentioned in at least 1 out of 5000 posts.
top_2009 = df2009.find_all_bigrams_above_threshold(.0005*len(df2009.df))
top_2012 = df2012.find_all_bigrams_above_threshold(.0005*len(df2009.df))
top_2009
top_2012
#phrases that were popular in 2012 but not in 2009 - suggestive of potential trends
print [g for g in top_2012 if g not in top_2009]
potential_trends_2012 = [g for g in top_2012 if g not in top_2009]
%run Trend.py
def make_trends(trend_list_str):
trend_list_obj = []
for item in trend_list_str:
item_split = item.split('_')
item = Trend('{} {}'.format(item_split[0], item_split[1]))
trend_list_obj.append(item)
return trend_list_obj
trend_list_2012 = make_trends(potential_trends_2012)
def show_plot(highlight_year=None):
plt.legend()
plt.xticks(rotation = -35, ha='left')
if highlight_year:
plt.axvspan(pd.datetime(highlight_year, 1, 1), pd.datetime(highlight_year + 1, 1, 1), color='grey', alpha=0.6)
plt.axvspan(pd.datetime(highlight_year, 1, 1), pd.datetime(highlight_year + 2, 1, 1), color='grey', alpha=0.3)
seaborn.set(rc={'figure.facecolor':'white'})
plt.get_cmap('spring')
plt.ylabel("term frequency / total posts")
plt.show()
for trend in trend_list_2012:
trend.plot_by_month(dfX.df, 1, 2009, 48)
plt.legend()
show_plot()
#what are some popular adjectives from 2012?
descriptors_2012 = []
for item in trend_list_2012:
item_s = item.phrase.split()
descriptors_2012.append(item_s[0])
count_descriptors = Counter(descriptors_2012)
[(k, v) for k, v in count_descriptors.iteritems() if v > 1]
[(k, v) for k, v in df2012.count.iteritems() if 'peplum' in k and '_' in k and v > 1]
peplum_list = [k for k, v in df2012.count.iteritems() if 'peplum' in k and '_' in k and v > 1]
peplum_list_to_plot = make_trends(peplum_list)
for p in peplum_list_to_plot:
p.plot_by_month(dfX.df, 1, 2009, 48)
plt.legend()
show_plot()
[(k, v) for k, v in df2012.count.iteritems() if 'floral' in k and '_' in k and v > 10]
floral_list = [k for k, v in df2012.count.iteritems() if 'floral' in k and '_' in k and v > 50]
floral_list_to_plot = make_trends(floral_list)
for f in floral_list_to_plot:
f.plot_by_month(dfX.df, 1, 2009, 48)
plt.legend()
show_plot()
At first glance, this makes it look like florals aren't actually a trend; however, the green line, representing 'floral pants', has a major peak in summer 2012. Let's look more closely at other floral pant-like objects.
like_pants = model_base.most_similar('pants')
like_pants = [i[0] for i in like_pants]
like_pants
floral_pants = []
for p in like_pants:
floral_pants.extend([k for k, v in df2012.count.iteritems() \
if 'floral' in k and p in k and '_' in k and v > 1])
floral_pants = set(floral_pants)
floral_pants
floral_pants_to_plot = make_trends(floral_pants)
for f in floral_pants_to_plot:
f.plot_by_month(dfX.df, 1, 2009, 48)
show_plot()
[(k, v) for k, v in df2012.count.iteritems() if 'leggings' in k and v > 15]
leggings = [k for k, v in df2012.count.iteritems() if 'leggings' in k \
and '_' in k and v > 15]
leggings_to_plot = make_trends(leggings)
for l in leggings_to_plot:
l.plot_by_month(dfX.df, 1, 2009, 48)
show_plot()
galaxy = Trend('galaxy')
galaxy.plot_by_month(dfX.df, 1, 2009, 48)
show_plot()
[(k, v) for k, v in df2012.count.iteritems() if 'galaxy' in k and '_' in k]
dfY, model = create_TrendDF(2013, 2013)
dfX_Y = pd.concat([dfX.df, dfY.df])
galaxy.plot_by_month(dfX_Y, 1, 2009, 60)
show_plot(2012)
leather_leggings = Trend('leather leggings')
leather_leggings.plot_by_month(dfX_Y, 1, 2009, 60)
show_plot(2012)
def plot_with_highlight(garment):
trend = Trend(garment)
trend.plot_by_month(dfX_Y, 1, 2009, 60)
show_plot(2012)
interesting_terms = ['peplum top', 'floral pants', 'chiffon', 'mullet skirt', 'wedge sneaker']
for term in interesting_terms:
plot_with_highlight(term)